{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0e851ea7-0d08-40a5-9ab1-a2a58cb7fd2a",
   "metadata": {},
   "source": [
    "# Building A Dialogue Feature Extraction Pipeline Using Function Calling!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "857b84af-3ca3-49fd-a525-7083a465fcd8",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "sample_data = \\\n",
    "\"\"\"Agent: Thank you for calling BrownBox Customer Support. My name is Tom. How may I assist you today?\\nCustomer: Hi Tom, I'm trying to log in to my account to purchase an Oven Toaster Grill (OTG), but I'm unable to proceed as it's asking for mobile number or email verification. Can you help me with that?\\nAgent: Sure, I can assist you with that. May I know your registered mobile number or email address, please?\\nCustomer: My registered mobile number is +1 123-456-7890.\\nAgent: Thank you. Let me check that for you. I'm sorry to inform you that we don't have this number on our records. Can you please confirm if this is the correct number?\\nCustomer: Oh, I'm sorry. I might have registered with a different number. Can you please check with my email address instead? It's johndoe@email.com.\\nAgent: Sure, let me check that for you. (After a few moments) I see that we have your email address on our records. We'll be sending you a verification code shortly. Please check your email and let me know once you receive it. Customer: Okay, I received the code. What do I do with it?\\nAgent: Please enter the verification code in the field provided and click on 'Verify'. Once your email address is verified, you'll be able to proceed with your purchase.\\nCustomer: Okay, I entered the code, and it's verified now. Thank you for your help.\\nAgent: You're welcome. Is there anything else I can assist you with?\\nCustomer: No, that's all. Thank you.\\nAgent: You're welcome. Have a great day!\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4476ac2b-25c1-447e-b121-c7b3c4c48006",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "print (sample_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "532155d8-d874-4d92-a17d-760210f7ed81",
   "metadata": {},
   "source": [
    "### Defining What's Important"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9dd444e1-816f-466c-a9c2-cb860b5b74bc",
   "metadata": {
    "height": 115
   },
   "outputs": [],
   "source": [
    "from utils import query_raven\n",
    "from typing import List\n",
    "from dataclasses import dataclass\n",
    "# Warning control\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "deac8cdb-ebab-410a-be75-45bbb019fead",
   "metadata": {
    "height": 302,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from dataclasses import dataclass\n",
    "schema_id = (\"agent_name\", \"customer_email\", \\\n",
    "             \"customer_order\", \"customer_phone\", \"customer_sentiment\")\n",
    "\n",
    "dataclass_schema_representation = '''\n",
    "@dataclass\n",
    "class Record:\n",
    "    agent_name : str # The agent name\n",
    "    customer_email : str # customer email if provided, else ''\n",
    "    customer_order : str # The customer order number if provided, else ''\n",
    "    customer_phone : str # the customer phone number if provided, else ''\n",
    "    customer_sentiment : str # Overall customer sentiment, either 'frustrated', or 'happy'. Always MUST have a value.\n",
    "'''\n",
    "\n",
    "# Let's call exec to insert the dataclass into our python interpreter so it understands this. \n",
    "exec(dataclass_schema_representation)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "211106d3-59d8-42c5-9a60-1bab9a6d148f",
   "metadata": {},
   "source": [
    "### Building The Database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86d6ccdb-dfa6-4b1d-96cc-8a6ccc30a754",
   "metadata": {
    "height": 608,
    "tags": []
   },
   "outputs": [],
   "source": [
    "def initialize_db():\n",
    "    import sqlite3\n",
    "\n",
    "    # Connect to SQLite database (or create it if it doesn't exist)\n",
    "    conn = sqlite3.connect('extracted.db')\n",
    "    cursor = conn.cursor()\n",
    "\n",
    "    # Fixed table name\n",
    "    table_name = \"customer_information\"\n",
    "\n",
    "    # Fixed schema\n",
    "    columns = \"\"\"\n",
    "    id INTEGER PRIMARY KEY, \n",
    "    agent_name TEXT, \n",
    "    customer_email TEXT, \n",
    "    customer_order TEXT, \n",
    "    customer_phone TEXT, \n",
    "    customer_sentiment TEXT\n",
    "    \"\"\"\n",
    "\n",
    "    # Ensure the table name is enclosed in quotes if it contains special characters\n",
    "    quoted_table_name = f'\"{table_name}\"'\n",
    "\n",
    "    # Check if a table with the exact name already exists\n",
    "    cursor.execute(f\"SELECT name FROM sqlite_master WHERE type='table' AND name={quoted_table_name}\")\n",
    "    if cursor.fetchone():\n",
    "        print(f\"Table {table_name} already exists.\")\n",
    "    else:\n",
    "        # Create the new table with the fixed schema\n",
    "        cursor.execute(f'''CREATE TABLE {quoted_table_name} ({columns})''')\n",
    "        print(f\"Table {table_name} created successfully.\")\n",
    "\n",
    "    # Commit the transaction and close the connection\n",
    "    conn.commit()\n",
    "    conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "027fd826-b7ed-4f3b-afa3-08940b14412b",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm extracted.db\n",
    "initialize_db()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "08a0c009-2865-4f17-b2f7-4d290f58c68e",
   "metadata": {},
   "source": [
    "### Adding in Tools To Populate The Database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fae2d563-3643-418f-b662-c7cb7942164e",
   "metadata": {
    "height": 574,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from dataclasses import dataclass, fields\n",
    "def update_knowledge(results_list : List[Record]):\n",
    "    \"\"\"\n",
    "    Registers the information necessary\n",
    "    \"\"\"\n",
    "    import sqlite3\n",
    "    from sqlite3 import ProgrammingError\n",
    "\n",
    "    # Reconnect to the existing SQLite database\n",
    "    conn = sqlite3.connect('extracted.db')\n",
    "    cursor = conn.cursor()\n",
    "\n",
    "    # Fixed table name\n",
    "    table_name = \"customer_information\"\n",
    "\n",
    "    # Prepare SQL for inserting data with fixed column names\n",
    "    column_names = \"agent_name, customer_email, customer_order, customer_phone, customer_sentiment\"\n",
    "    placeholders = \", \".join([\"?\"] * 5) \n",
    "    sql = f\"INSERT INTO {table_name} ({column_names}) VALUES ({placeholders})\"\n",
    "\n",
    "    # Insert each record\n",
    "    for record in results_list:\n",
    "        try:\n",
    "            record_values = tuple(getattr(record, f.name) for f in fields(record))\n",
    "            cursor.execute(sql, record_values)\n",
    "        except ProgrammingError as e:\n",
    "            print(f\"Error with record. {e}\")\n",
    "            continue\n",
    "\n",
    "    # Commit the changes and close the connection\n",
    "    conn.commit()\n",
    "    conn.close()\n",
    "    print(\"Records inserted successfully.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7085f71c-6ba7-409c-b092-a08934d35738",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "my_record = Record(agent_name = \"Agent Smith\", \\\n",
    "                   customer_email = \"\", customer_order = \"12346\", \\\n",
    "                   customer_phone = \"\", customer_sentiment = \"happy\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6770393-3a32-4977-9775-67c711542f7a",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "update_knowledge([my_record])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e6ff129-1ad4-4ca1-92dc-6950bb256670",
   "metadata": {},
   "source": [
    "### Building Tools To Pull Information Out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "148433d3-b10b-4815-b7f6-6cd2b82f85eb",
   "metadata": {
    "height": 438,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sqlite3\n",
    "def execute_sql(sql: str):\n",
    "    \"\"\" Runs SQL code for the given schema. Make sure to properly leverage the schema to answer the user's question in the best way possible. \"\"\"\n",
    "    # Fixed table name, assuming it's not dynamically generated anymore\n",
    "    table_name = \"customer_information\"\n",
    "\n",
    "    # Establish a connection to the database\n",
    "    conn = sqlite3.connect('extracted.db')\n",
    "    cursor = conn.cursor()\n",
    "\n",
    "    # Execute the SQL statement\n",
    "    cursor.execute(sql)\n",
    "\n",
    "    # Initialize an empty list to hold query results\n",
    "    results = []\n",
    "\n",
    "    results = cursor.fetchall()\n",
    "    print(\"Query operation executed successfully. Number of rows returned:\", len(results))\n",
    "\n",
    "    # Close the connection to the database\n",
    "    conn.close()\n",
    "\n",
    "    # Return the results for SELECT operations; otherwise, return an empty list\n",
    "    return results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c50be4d-8792-4c2c-b4d2-1ee6faae5b56",
   "metadata": {
    "height": 166,
    "tags": []
   },
   "outputs": [],
   "source": [
    "sql = '''\n",
    "    SELECT agent_name \n",
    "        FROM customer_information\n",
    "        WHERE customer_sentiment = \"happy\"\n",
    "    '''\n",
    "# Print the final SQL command for debugging\n",
    "print(\"Executing SQL:\", sql)\n",
    "\n",
    "execute_sql(sql)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a2e6703f-60cf-4ae2-bc61-afe8ad66d5be",
   "metadata": {},
   "source": [
    "-----"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51644729-ea6f-4fbf-ac99-08308c998cc1",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Building The Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31af1526-330b-467c-b707-b13b70bfe663",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm extracted.db\n",
    "initialize_db()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7ae3b10-f302-4614-8b04-3d6d2065f8b0",
   "metadata": {},
   "source": [
    "##### Attribution:\n",
    "We will be using a handful of samples (~10-15 samples) in this lesson from a publically-available customer_service_chatbot on HuggingFace.\n",
    "The link to the public dataset is here: https://huggingface.co/datasets/SantiagoPG/customer_service_chatbot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4b4ecd7-6822-4fb5-82ce-06ef221910ec",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "import os\n",
    "\n",
    "cwd = os.getcwd()\n",
    "dialogue_data = load_dataset(cwd + \"/data/customer_service_chatbot\", cache_dir=\"./cache\")[\"train\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4130ed17-b019-483f-913a-c45776e28560",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "sample_zero = dialogue_data[6]\n",
    "dialogue_string = sample_zero[\"conversation\"].replace(\"\\n\\n\", \"\\n\")\n",
    "print (dialogue_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bcceaf5-7be1-4e15-bdd6-24c37d72fa64",
   "metadata": {
    "height": 183,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import inspect\n",
    "\n",
    "prompt = \"\\n\" + dialogue_string\n",
    "\n",
    "signature = inspect.signature(update_knowledge)\n",
    "signature = str(signature).replace(\"__main__.Record\", \"Record\")\n",
    "docstring = update_knowledge.__doc__\n",
    "\n",
    "raven_prompt = f'''{dataclass_schema_representation}\\nFunction:\\n{update_knowledge.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n\\nUser Query:{prompt}<human_end>'''\n",
    "print (raven_prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80ef5b38-f117-4273-8204-132efcf61e09",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "raven_call = query_raven(raven_prompt)\n",
    "print (raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fcfc6880-a5e2-4ed0-824c-fe442ecf3ca4",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "exec(raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb28ea92-4540-47e2-acbb-ef1f0c44ed19",
   "metadata": {
    "height": 251,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import inspect\n",
    "\n",
    "sample_zero = dialogue_data[10]\n",
    "dialogue_string = sample_zero[\"conversation\"].replace(\"\\n\\n\", \"\\n\")\n",
    "\n",
    "prompt = \"\\n\" + dialogue_string\n",
    "signature = inspect.signature(update_knowledge)\n",
    "docstring = update_knowledge.__doc__\n",
    "raven_prompt = f'''{dataclass_schema_representation}\\nFunction:\\n{update_knowledge.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n{prompt}<human_end>'''\n",
    "\n",
    "raven_call = query_raven(raven_prompt)\n",
    "print (raven_call)\n",
    "exec(raven_call)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0e719ae-c35d-4659-85c2-59af988af124",
   "metadata": {
    "height": 115,
    "tags": []
   },
   "outputs": [],
   "source": [
    "execute_sql(\n",
    "    '''\n",
    "    SELECT COUNT(customer_sentiment) \n",
    "    FROM customer_information\n",
    "    WHERE agent_name = \"John\" AND customer_sentiment = \"happy\"\n",
    "    ''')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57dfa8db-7286-414d-a769-e6ad6dfb7474",
   "metadata": {
    "height": 370,
    "tags": []
   },
   "outputs": [],
   "source": [
    "prompt = \"how many customers John has made happy.\"\n",
    "\n",
    "signature = inspect.signature(execute_sql)\n",
    "\n",
    "docstring = execute_sql.__doc__\n",
    "\n",
    "sql_schema_representation = \\\n",
    "\"\"\"\n",
    "CREATE TABLE customer_information (\n",
    "    id INTEGER PRIMARY KEY AUTOINCREMENT,\n",
    "    agent_name TEXT,\n",
    "    customer_email TEXT,\n",
    "    customer_order TEXT,\n",
    "    customer_phone TEXT,\n",
    "    customer_sentiment TEXT\n",
    ");\n",
    "\"\"\"\n",
    "\n",
    "raven_prompt = f'''{sql_schema_representation}\\nFunction:\\n{execute_sql.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n\\nUser Query:{prompt}<human_end>'''\n",
    "\n",
    "print (raven_prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "963ef081-30ac-4813-b17c-ef9dcf1ecdd8",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "raven_call = query_raven(raven_prompt)\n",
    "\n",
    "print (raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80cd0082-e0a2-4133-8018-e57cf5fca8f6",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "eval(raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37241723-ec18-42f5-a8dc-16d9ac8998e0",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm extracted.db\n",
    "initialize_db()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d17f5dd-4879-49b7-a7b4-a1a970efa45f",
   "metadata": {
    "height": 251,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "for i in tqdm(range(0, 10)):\n",
    "    data = dialogue_data[i]\n",
    "    dialogue_string = data[\"conversation\"].replace(\"\\n\\n\", \"\\n\")\n",
    "    \n",
    "    # Ask Raven to extract the information we want out of this dialogue. \n",
    "    prompt = \"\\n\" + dialogue_string\n",
    "    signature = inspect.signature(update_knowledge)\n",
    "    docstring = update_knowledge.__doc__\n",
    "    raven_prompt = f'''{dataclass_schema_representation}\\nFunction:\\n{update_knowledge.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n\\nUser Query:{prompt}<human_end>'''\n",
    "    raven_call = query_raven(raven_prompt)\n",
    "    print (raven_call)\n",
    "    exec(raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c22fce8-ddf3-4d4b-9d71-fab489581884",
   "metadata": {
    "height": 404,
    "tags": []
   },
   "outputs": [],
   "source": [
    "signature = inspect.signature(execute_sql)\n",
    "\n",
    "docstring = execute_sql.__doc__\n",
    "\n",
    "schema_representation = \\\n",
    "\"\"\"\n",
    "CREATE TABLE customer_information (\n",
    "    id INTEGER PRIMARY KEY AUTOINCREMENT,\n",
    "    agent_name TEXT,\n",
    "    customer_email TEXT,\n",
    "    customer_order TEXT,\n",
    "    customer_phone TEXT,\n",
    "    customer_sentiment TEXT\n",
    ");\n",
    "\"\"\"\n",
    "\n",
    "raven_prompt = f'''{schema_representation}\\nFunction:\\n{execute_sql.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n\\n'''\n",
    "raven_prompt = raven_prompt + \"User Query: How many happy customers?<human_end>\"\n",
    "print (raven_prompt)\n",
    "raven_call = query_raven(raven_prompt)\n",
    "\n",
    "print (raven_call)\n",
    "eval(raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de0a70a9-1cb1-4d70-8da7-8450b467fd8c",
   "metadata": {
    "height": 183,
    "tags": []
   },
   "outputs": [],
   "source": [
    "raven_prompt = f'''{schema_representation}\\nFunction:\\n{execute_sql.__name__}{signature}\\n    \"\"\"{docstring}\"\"\"\\n\\n\\n'''\n",
    "raven_prompt = raven_prompt + \\\n",
    "\"User Query: Give me the names and phone numbers of the ones\"\\\n",
    "\"who are frustrated and the order numbers?<human_end>\"\n",
    "\n",
    "print (raven_prompt)\n",
    "raven_call = query_raven(raven_prompt)\n",
    "\n",
    "print (raven_call)\n",
    "eval(raven_call)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "958c3275-5edb-484d-a743-680f8bcb9173",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
